## Setup chunk
knitr::opts_chunk$set(echo = TRUE, include = TRUE, message = FALSE, warning = FALSE)


pacman::p_load(
  "XML",
  "tidyverse",
  "fs",
  "assertthat",
  "stringi",
  "dtw",
  "RTransferEntropy",
  "signal",
  "conflicted",
  "Rcpp",
  "future",
  "fastICA",
  "groupICA",
  "dtw",
  "dplyr",
  "plotly",
  "htmlwidgets",
  "hrbrthemes",
  "zoo"
)
## 
## The downloaded binary packages are in
##  /var/folders/hq/09bb9nmj3bs24myvqjb9lcxr0000gn/T//RtmpMD7QLM/downloaded_packages
## 
## plotly installed
## Warning in pacman::p_load("XML", "tidyverse", "fs", "assertthat", "stringi", : Failed to install/load:
## plotly
conflicts_prefer(dplyr::filter)
## [conflicted] Will prefer dplyr::filter over any other package.
## Making sure we are in the right directory
wd <- getwd()
if (basename(wd) != "PerceptionActionExam") {
  setwd("./PerceptionActionExam")
}

                              ##NADIA PATH##
 # data_dir <- path_home() %>% 
 #   path("Documents", "GitHub", "PerceptionActionExam-Clean-up-attempt", "data", "tsvs") ## Nadia's path

                              #KATHARINA PATH##
data_dir <- path_home() %>%
path("Desktop","UNI", "3.semester", "Perception & Action", "PerceptionActionExam", "data", "tsvs") ## Katharina's path


#Aesthetic setup
theme_set(theme_ipsum(base_family = "Times New Roman"))
global_fill_colour <- "#8d5b5a"
aesthetic_palette <- c(
  "#d8aeb5","#c17f8c","#b59592","#9b6f69","#a94f62","#8d5b5a","#684141","#733545","#523438","#48222b","#2f1a1b")
aesthetic_highlight_difference_palette <- c("#d8aeb5","#2f1a1b")

Importing from Qualisys

Loading functions made by Luke for later processing + making sure we are getting the conditions we want.

# The available conditions and their start and end frame indices
# to use the whole file, just set the value for the condition to c(NA, NA)
conditions <- list(
  jointlead = c(NA, NA),
  leadfollow = c(NA, NA)
)

# Calling functions
source("Functions.R")

And so, some more boring stuff…This is just getting the labels so we can make sure that the tracked markers match up with what we expected, really, this shouldn’t go wrong, but if you don’t check this kind of thing you’ll end up scratching your head later wondering why everything broke.

# load the labels from the XML file

# load the XML file
xmlfile <- xmlParse("PerAct23_LabelList.xml")

# get the labels, which are in the following format:
# <QTM_Label_List_Ver_1.00>
#     <Trajectories>
#         <Trajectory>
#             <Name>A_head_top</Name>
#             <Color R="0" G="147" B="0"/>
#         </Trajectory>
#     </Trajectories>
# </QTM_Label_List_Ver_1.00>

# get the trajectory names
traj_names <- xpathSApply(xmlfile, "//Trajectory/Name", xmlValue)

# get the trajectory colors
traj_colors <- xpathSApply(xmlfile, "//Trajectory/Color", xmlAttrs)

# convert the colors to hex
traj_colors <- rgb(
  as.numeric(traj_colors[1,]),
  as.numeric(traj_colors[2,]),
  as.numeric(traj_colors[3,]),
  alpha = 255,
  maxColorValue = 255
)


# combine the names and colors into a data frame
traj_labels <- data.frame(
  traj_names,
  traj_colors,
  stringsAsFactors = FALSE
)

rm(xmlfile, traj_names, traj_colors) #cleaning up

Finally loading in data hehehe (insert elmo meme with fire in the background)

Aditionally, this code is transforming data from a wide format, where each marker’s x, y, and z coordinates are in separate columns, to a long format where all the coordinates are in a single column. It then adds more information about each observation (subject, axis, and marker) before pivoting it back to a wide format.

## the code lists files in the specified directory that have names ending with ".tsv" and stores the list of file paths in the variable traj_files.
traj_files <- fs::dir_ls(data_dir, regexp = "\\.tsv$")

Loading the data in and giving meaningful names

We make a list that contains all the individual dataframes. This list will be called on and iterated through throughout the next steps. (was called traj_data_list in lukes script. Here it is list_of_dataframes)

# Now we can actually load in the data. When we are loading in the data, we are also renaming our list to be the name of the individual groups and conditions 
list_of_dataframes <- list()

for (file_path in traj_files) {
  # Load the data from the file
  traj_data <- process_qtm_tsv(file_path)
  
  # Extract the group number and condition from the filename
  group_number <- sub(".*group([0-9]+).*\\.tsv", "\\1", basename(file_path))
  condition <- traj_data$metadata$condition
  
  # Create a unique identifier for the combination of group number and condition
  group_condition_identifier <- paste0("group", group_number, "_", condition)
  
  # Check if a dataframe with this identifier already exists in the list
  if (group_condition_identifier %in% names(list_of_dataframes)) {
    # If it exists, append the data to the existing dataframe
    list_of_dataframes[[group_condition_identifier]]$data <- rbind(list_of_dataframes[[group_condition_identifier]]$data, traj_data$data)
  } else {
    # If it doesn't exist, create a new dataframe and add it to the list
    list_of_dataframes[[group_condition_identifier]] <- traj_data
  }
}

# Now, list_of_dataframes contains individual lists for each unique combination of group number and condition

rm(group_condition_identifier,group_number)

Adding condition and group columns

This is so we can have nice individual dataframes later on for each group and condition

list_of_dataframes <- lapply(list_of_dataframes, function(x) {
  # Add condition and group information to the data frame
  x$data$condition <- x$metadata$condition
  
  # Set x$data$group to be the list name
  x$data$group <- basename(file_path)
  
  return(x)
})

Cleaning up the list

Making sure the list only contains the data from each group, and not the metadata.

# Assuming list_of_dataframes is a list of data frames with both 'data' and 'metadata'
for (i in seq_along(list_of_dataframes)) {
  x <- list_of_dataframes[[i]]
  
  # Check if 'data' and 'metadata' components exist in each list element
  if (!all(c("data", "metadata") %in% names(x))) {
    warning("List element does not have 'data' and/or 'metadata'. Skipping.")
    next
  }
  
  # Extract condition and group information from the file path
  file_path <- names(list_of_dataframes)[i]
  condition <- sub(".*group[0-9]+_([^_\\.]+).*", "\\1", file_path)
  group_number <- sub(".*group([0-9]+).*", "\\1", file_path)
  
  # Add condition and group information to the data frame
  x$data$condition <- condition
  x$data$group <- paste0("group", group_number)
  
  # Overwrite the original list element with the processed data
  list_of_dataframes[[i]] <- x$data
}

# 'list_of_dataframes' now contains all the processed data frames


rm(x, condition, group_number, i) #Cleaning up

Turning condition and group into factors and ensuring all marker names are not the same

The columns “group” and “condition” shall be looked at as factors, and we are also performing quality control to make sure no markers have weird unique names.

library(dplyr)
library(stringi)  # For stri_replace_last_regex
library(assertthat)  # For assert_that

# Loop through each index of the list
for (i in seq_along(list_of_dataframes)) {
  # Extract the current data frame directly from the list
  df <- list_of_dataframes[[i]]
  
  # Check if 'condition' and 'group' columns exist
  if (!all(c("condition", "group") %in% names(df))) {
    warning(paste("Data frame at index", i, "does not have 'condition' and/or 'group' columns. Skipping."))
    next
  }
  
  # Add factors for condition and group
  df$condition <- factor(df$condition)
  df$group <- factor(df$group)
  
  # Print the data frame index
  cat("Data frame at index:", i, "\n")
  
  # Print the first few rows of the data frame
  print(head(df))
  
  # Ensure all marker names are the same
  marker_names <- unique(df %>% select(contains("_x")) %>% names() %>% stri_replace_last_regex("_x", ""))
  
  assert_that(
    all(marker_names == traj_labels$traj_names),
    msg = paste("Not all marker names are the same in Data frame at index", i)
  )
  
  # Update the original data frame in the list
  list_of_dataframes[[i]] <- df
  
  cat("\n")
}
## Data frame at index: 1 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0               627.        -17.5        1743.           544.
## 2     2      0.00333         627.        -17.5        1743.           544.
## 3     3      0.00667         627.        -17.5        1743.           544.
## 4     4      0.01            627.        -17.4        1743.           544.
## 5     5      0.0133          627.        -17.4        1743.           544.
## 6     6      0.0167          626.        -17.3        1743.           544.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 2 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0               470.        -73.6        1746.           386.
## 2     2      0.00333         470.        -73.6        1746.           386.
## 3     3      0.00667         470.        -73.6        1746.           386.
## 4     4      0.01            470.        -73.6        1746.           386.
## 5     5      0.0133          470.        -73.5        1746.           386.
## 6     6      0.0167          470.        -73.4        1746.           386.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 3 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0               344.        -23.1        1784.           288.
## 2     2      0.00333         344.        -23.1        1784.           288.
## 3     3      0.00667         344.        -23.0        1784.           288.
## 4     4      0.01            344.        -23.0        1784.           288.
## 5     5      0.0133          344.        -23.0        1784.           288.
## 6     6      0.0167          345.        -23.2        1784.           288.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 4 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0               351.        -125.        1809.           252.
## 2     2      0.00333         352.        -124.        1810.           253.
## 3     3      0.00667         353.        -123.        1810.           254.
## 4     4      0.01            354.        -121.        1811.           255.
## 5     5      0.0133          355.        -120.        1811.           255.
## 6     6      0.0167          356.        -119.        1812.           256.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 5 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0               384.        -59.6        1735.           349.
## 2     2      0.00333         384.        -59.6        1735.           349.
## 3     3      0.00667         384.        -59.6        1735.           349.
## 4     4      0.01            386.        -59.5        1734.           349.
## 5     5      0.0133          386.        -59.5        1734.           349.
## 6     6      0.0167          386.        -59.5        1734.           349.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 6 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0               475.        -30.0        1795.             NA
## 2     2      0.00333         475.        -30.1        1795.             NA
## 3     3      0.00667         475.        -30.1        1795.             NA
## 4     4      0.01            473.        -29.8        1795.             NA
## 5     5      0.0133          473.        -29.8        1795.             NA
## 6     6      0.0167          473.        -29.8        1795.             NA
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 7 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0               420.        -14.9        1779.           316.
## 2     2      0.00333         420.        -15.6        1779.           317.
## 3     3      0.00667         421.        -16.2        1779.           317.
## 4     4      0.01            420.        -16.4        1779.           318.
## 5     5      0.0133          421.        -17.0        1779.           318.
## 6     6      0.0167          421.        -17.6        1779.           319.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 8 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0               369.         2.45        1778.           272.
## 2     2      0.00333         369.         2.38        1778.           272.
## 3     3      0.00667         369.         2.33        1778.           272.
## 4     4      0.01            368.         2.71        1778.           272.
## 5     5      0.0133          368.         2.66        1778.           272.
## 6     6      0.0167          368.         2.62        1779.           272.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 9 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0              -595.        -20.6        1744.          -489.
## 2     2      0.00333        -595.        -20.6        1744.          -489.
## 3     3      0.00667        -595.        -20.5        1744.          -489.
## 4     4      0.01           -594.        -21.0        1745.          -489.
## 5     5      0.0133         -594.        -20.9        1745.          -489.
## 6     6      0.0167         -594.        -20.8        1746.          -489.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 10 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0              -521.        -60.2        1737.          -414.
## 2     2      0.00333        -521.        -60.1        1737.          -414.
## 3     3      0.00667        -521.        -60.1        1737.          -414.
## 4     4      0.01           -521.        -59.9        1737.          -414.
## 5     5      0.0133         -520.        -60.5        1738.          -414.
## 6     6      0.0167         -521.        -59.8        1737.          -414.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 11 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0              -352.        -221.        1670.          -362.
## 2     2      0.00333        -352.        -221.        1670.          -362.
## 3     3      0.00667        -352.        -221.        1670.          -362.
## 4     4      0.01           -352.        -221.        1670.          -362.
## 5     5      0.0133         -352.        -221.        1670.          -362.
## 6     6      0.0167         -352.        -221.        1670.          -361.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 12 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0              -496.        -57.7        1767.          -385.
## 2     2      0.00333        -496.        -57.6        1767.          -385.
## 3     3      0.00667        -495.        -57.5        1767.          -384.
## 4     4      0.01           -495.        -57.2        1767.          -384.
## 5     5      0.0133         -493.        -57.6        1768.          -383.
## 6     6      0.0167         -493.        -57.5        1768.          -383.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 13 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0              -612.        -87.0        1650.          -533.
## 2     2      0.00333        -612.        -87.3        1650.          -533.
## 3     3      0.00667        -611.        -87.6        1650.          -533.
## 4     4      0.01           -611.        -87.9        1650.          -533.
## 5     5      0.0133         -610.        -88.1        1650.          -533.
## 6     6      0.0167         -609.        -88.2        1650.          -534.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 14 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0              -611.        -93.4        1736.          -555.
## 2     2      0.00333        -611.        -93.5        1735.          -555.
## 3     3      0.00667        -610.        -93.6        1735.          -555.
## 4     4      0.01           -610.        -93.7        1735.          -554.
## 5     5      0.0133         -610.        -93.8        1735.          -554.
## 6     6      0.0167         -610.        -94.0        1736.          -554.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 15 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0              -862.        -13.9        1761.             NA
## 2     2      0.00333        -862.        -13.8        1761.             NA
## 3     3      0.00667        -862.        -13.8        1761.             NA
## 4     4      0.01           -862.        -13.7        1761.             NA
## 5     5      0.0133         -862.        -13.7        1761.             NA
## 6     6      0.0167         -862.        -13.6        1761.             NA
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
## 
## Data frame at index: 16 
## # A tibble: 6 × 67
##   index elapsed_time A_head_top_x A_head_top_y A_head_top_z A_head_right_x
##   <dbl>        <dbl>        <dbl>        <dbl>        <dbl>          <dbl>
## 1     1      0              -481.        -107.        1619.          -451.
## 2     2      0.00333        -481.        -107.        1618.          -452.
## 3     3      0.00667        -480.        -107.        1618.          -452.
## 4     4      0.01           -480.        -107.        1618.          -452.
## 5     5      0.0133         -480.        -107.        1618.          -453.
## 6     6      0.0167         -480.        -107.        1618.          -453.
## # ℹ 61 more variables: A_head_right_y <dbl>, A_head_right_z <dbl>,
## #   A_head_left_x <dbl>, A_head_left_y <dbl>, A_head_left_z <dbl>,
## #   A_chest_x <dbl>, A_chest_y <dbl>, A_chest_z <dbl>, A_back_x <dbl>,
## #   A_back_y <dbl>, A_back_z <dbl>, A_shoulder_right_x <dbl>,
## #   A_shoulder_right_y <dbl>, A_shoulder_right_z <dbl>,
## #   A_shoulder_left_x <dbl>, A_shoulder_left_y <dbl>, A_shoulder_left_z <dbl>,
## #   A_elbow_right_x <dbl>, A_elbow_right_y <dbl>, A_elbow_right_z <dbl>, …
rm(df)

Going from wide format to long format

Final step before we can work on the dataframes. Currently, the

library(dplyr)
library(tidyr)
library(stringi)  # For string manipulation

# Loop through each index of the list
for (i in seq_along(list_of_dataframes)) {
  # Extract the current data frame directly from the list
  df <- list_of_dataframes[[i]]
  
  # Check if 'condition' and 'group' columns exist
  if (!all(c("condition", "group") %in% names(df))) {
    warning(paste("Data frame at index", i, "does not have 'condition' and/or 'group' columns. Skipping."))
    next
  }
  
  # Pivot the data
  df <- df %>% 
    pivot_longer(
      cols = contains("_x") | contains("_y") | contains("_z"),
      names_to = "marker",
      values_to = "value"
    ) %>%
    mutate(
      subject = stri_replace_first_regex(marker, "^([AB])_.*", "$1"),
      axis = stri_extract_last_regex(marker, "[xyz]$"),
      marker = stri_replace_first_regex(marker, "^[AB]_([a-zA-Z_]+)_[xyz]$", "$1")
    ) %>%
    # Move axes to columns
    pivot_wider(
      names_from = axis,
      values_from = value
    )
  
  # Update the original data frame in the list
  list_of_dataframes[[i]] <- df
  
  # Print the data frame index
  cat("Data frame at index:", i, "\n")
  
  # Print the first few rows of the pivoted data frame
  print(head(df))
  
  cat("\n")
}
## Data frame at index: 1 
## # A tibble: 6 × 9
##   index elapsed_time condition group  marker         subject     x     y     z
##   <dbl>        <dbl> <fct>     <fct>  <chr>          <chr>   <dbl> <dbl> <dbl>
## 1     1            0 jointlead group0 head_top       A        627. -17.5 1743.
## 2     1            0 jointlead group0 head_right     A        544.  44.9 1676.
## 3     1            0 jointlead group0 head_left      A        564. -81.3 1665.
## 4     1            0 jointlead group0 chest          A         NA   NA     NA 
## 5     1            0 jointlead group0 back           A         NA   NA     NA 
## 6     1            0 jointlead group0 shoulder_right A        692. 115.  1480.
## 
## Data frame at index: 2 
## # A tibble: 6 × 9
##   index elapsed_time condition  group  marker        subject     x       y     z
##   <dbl>        <dbl> <fct>      <fct>  <chr>         <chr>   <dbl>   <dbl> <dbl>
## 1     1            0 leadfollow group0 head_top      A        470.  -73.6  1746.
## 2     1            0 leadfollow group0 head_right    A        386.   -6.80 1685.
## 3     1            0 leadfollow group0 head_left     A        398. -135.   1673.
## 4     1            0 leadfollow group0 chest         A        386.  -12.2  1387.
## 5     1            0 leadfollow group0 back          A         NA    NA      NA 
## 6     1            0 leadfollow group0 shoulder_rig… A         NA    NA      NA 
## 
## Data frame at index: 3 
## # A tibble: 6 × 9
##   index elapsed_time condition group   marker         subject     x      y     z
##   <dbl>        <dbl> <fct>     <fct>   <chr>          <chr>   <dbl>  <dbl> <dbl>
## 1     1            0 jointlead group12 head_top       A        344. -23.1  1784.
## 2     1            0 jointlead group12 head_right     A        288.  43.8  1687.
## 3     1            0 jointlead group12 head_left      A         NA   NA      NA 
## 4     1            0 jointlead group12 chest          A        431.  -4.23 1413.
## 5     1            0 jointlead group12 back           A         NA   NA      NA 
## 6     1            0 jointlead group12 shoulder_right A        541. 119.   1549.
## 
## Data frame at index: 4 
## # A tibble: 6 × 9
##   index elapsed_time condition  group   marker        subject     x      y     z
##   <dbl>        <dbl> <fct>      <fct>   <chr>         <chr>   <dbl>  <dbl> <dbl>
## 1     1            0 leadfollow group12 head_top      A        351. -125.  1809.
## 2     1            0 leadfollow group12 head_right    A        252. -103.  1727.
## 3     1            0 leadfollow group12 head_left     A        296. -178.  1700.
## 4     1            0 leadfollow group12 chest         A        416.  -20.1 1439.
## 5     1            0 leadfollow group12 back          A        605.  -34.0 1567.
## 6     1            0 leadfollow group12 shoulder_rig… A        484.   94.1 1607.
## 
## Data frame at index: 5 
## # A tibble: 6 × 9
##   index elapsed_time condition group   marker        subject     x       y     z
##   <dbl>        <dbl> <fct>     <fct>   <chr>         <chr>   <dbl>   <dbl> <dbl>
## 1     1            0 jointlead group13 head_top      A        384.  -59.6  1735.
## 2     1            0 jointlead group13 head_right    A        349.   -7.68 1626.
## 3     1            0 jointlead group13 head_left     A        354  -107.   1624.
## 4     1            0 jointlead group13 chest         A        446.  -41.6  1393.
## 5     1            0 jointlead group13 back          A         NA    NA      NA 
## 6     1            0 jointlead group13 shoulder_rig… A        585    79.5  1516.
## 
## Data frame at index: 6 
## # A tibble: 6 × 9
##   index elapsed_time condition  group   marker         subject     x     y     z
##   <dbl>        <dbl> <fct>      <fct>   <chr>          <chr>   <dbl> <dbl> <dbl>
## 1     1            0 leadfollow group13 head_top       A        475. -30.0 1795.
## 2     1            0 leadfollow group13 head_right     A         NA   NA     NA 
## 3     1            0 leadfollow group13 head_left      A        377. -75.6 1734.
## 4     1            0 leadfollow group13 chest          A         NA   NA     NA 
## 5     1            0 leadfollow group13 back           A         NA   NA     NA 
## 6     1            0 leadfollow group13 shoulder_right A         NA   NA     NA 
## 
## Data frame at index: 7 
## # A tibble: 6 × 9
##   index elapsed_time condition group  marker         subject     x     y     z
##   <dbl>        <dbl> <fct>     <fct>  <chr>          <chr>   <dbl> <dbl> <dbl>
## 1     1            0 jointlead group1 head_top       A        420. -14.9 1779.
## 2     1            0 jointlead group1 head_right     A        316.  57.1 1732.
## 3     1            0 jointlead group1 head_left      A        325. -72.5 1720.
## 4     1            0 jointlead group1 chest          A        345.  16.2 1395.
## 5     1            0 jointlead group1 back           A        556. -35.3 1472.
## 6     1            0 jointlead group1 shoulder_right A        482. 109.  1503.
## 
## Data frame at index: 8 
## # A tibble: 6 × 9
##   index elapsed_time condition  group  marker         subject     x      y     z
##   <dbl>        <dbl> <fct>      <fct>  <chr>          <chr>   <dbl>  <dbl> <dbl>
## 1     1            0 leadfollow group1 head_top       A        369.   2.45 1778.
## 2     1            0 leadfollow group1 head_right     A        272.  79.4  1724.
## 3     1            0 leadfollow group1 head_left      A        270. -51.3  1721.
## 4     1            0 leadfollow group1 chest          A        297.   1.68 1412.
## 5     1            0 leadfollow group1 back           A        525.   3.05 1475.
## 6     1            0 leadfollow group1 shoulder_right A         NA   NA      NA 
## 
## Data frame at index: 9 
## # A tibble: 6 × 9
##   index elapsed_time condition group  marker         subject     x     y     z
##   <dbl>        <dbl> <fct>     <fct>  <chr>          <chr>   <dbl> <dbl> <dbl>
## 1     1            0 jointlead group2 head_top       A       -595. -20.6 1744.
## 2     1            0 jointlead group2 head_right     A       -489. -66.9 1705.
## 3     1            0 jointlead group2 head_left      A       -480.  19.0 1708.
## 4     1            0 jointlead group2 chest          A       -512. -23.3 1431.
## 5     1            0 jointlead group2 back           A       -753. -27.4 1448.
## 6     1            0 jointlead group2 shoulder_right A       -652. -94.9 1522.
## 
## Data frame at index: 10 
## # A tibble: 6 × 9
##   index elapsed_time condition  group  marker         subject     x      y     z
##   <dbl>        <dbl> <fct>      <fct>  <chr>          <chr>   <dbl>  <dbl> <dbl>
## 1     1            0 leadfollow group2 head_top       A       -521.  -60.2 1737.
## 2     1            0 leadfollow group2 head_right     A       -414.  -16.2 1689.
## 3     1            0 leadfollow group2 head_left      A       -473.   48.5 1692.
## 4     1            0 leadfollow group2 chest          A         NA    NA     NA 
## 5     1            0 leadfollow group2 back           A       -702.  -95.6 1442.
## 6     1            0 leadfollow group2 shoulder_right A       -583. -156.  1511.
## 
## Data frame at index: 11 
## # A tibble: 6 × 9
##   index elapsed_time condition group  marker         subject     x      y     z
##   <dbl>        <dbl> <fct>     <fct>  <chr>          <chr>   <dbl>  <dbl> <dbl>
## 1     1            0 jointlead group3 head_top       A       -352. -221.  1670.
## 2     1            0 jointlead group3 head_right     A       -362. -298.  1571.
## 3     1            0 jointlead group3 head_left      A       -302. -232.  1554.
## 4     1            0 jointlead group3 chest          A         NA    NA     NA 
## 5     1            0 jointlead group3 back           A       -644.  -27.3 1493.
## 6     1            0 jointlead group3 shoulder_right A         NA    NA     NA 
## 
## Data frame at index: 12 
## # A tibble: 6 × 9
##   index elapsed_time condition  group  marker       subject     x        y     z
##   <dbl>        <dbl> <fct>      <fct>  <chr>        <chr>   <dbl>    <dbl> <dbl>
## 1     1            0 leadfollow group3 head_top     A       -496.  -57.7   1767.
## 2     1            0 leadfollow group3 head_right   A       -385.  -88.4   1714.
## 3     1            0 leadfollow group3 head_left    A       -392.    0.722 1725.
## 4     1            0 leadfollow group3 chest        A       -455.  -23.1   1391.
## 5     1            0 leadfollow group3 back         A       -681.  -15.4   1457.
## 6     1            0 leadfollow group3 shoulder_ri… A       -580. -149.    1486.
## 
## Data frame at index: 13 
## # A tibble: 6 × 9
##   index elapsed_time condition  group  marker        subject     x       y     z
##   <dbl>        <dbl> <fct>      <fct>  <chr>         <chr>   <dbl>   <dbl> <dbl>
## 1     1            0 leadfollow group4 head_top      A       -612.  -87.0  1650.
## 2     1            0 leadfollow group4 head_right    A       -533.    2.78 1627.
## 3     1            0 leadfollow group4 head_left     A       -616.   34.7  1621.
## 4     1            0 leadfollow group4 chest         A       -526.  -36.4  1319.
## 5     1            0 leadfollow group4 back          A         NA    NA      NA 
## 6     1            0 leadfollow group4 shoulder_rig… A       -590. -207.   1398.
## 
## Data frame at index: 14 
## # A tibble: 6 × 9
##   index elapsed_time condition group  marker         subject     x      y     z
##   <dbl>        <dbl> <fct>     <fct>  <chr>          <chr>   <dbl>  <dbl> <dbl>
## 1     1            0 jointlead group6 head_top       A       -611.  -93.4 1736.
## 2     1            0 jointlead group6 head_right     A       -555. -159.  1638.
## 3     1            0 jointlead group6 head_left      A       -567.  -61.0 1634.
## 4     1            0 jointlead group6 chest          A       -685. -135.  1391.
## 5     1            0 jointlead group6 back           A         NA    NA     NA 
## 6     1            0 jointlead group6 shoulder_right A       -799. -263.  1493.
## 
## Data frame at index: 15 
## # A tibble: 6 × 9
##   index elapsed_time condition  group  marker         subject     x     y     z
##   <dbl>        <dbl> <fct>      <fct>  <chr>          <chr>   <dbl> <dbl> <dbl>
## 1     1            0 leadfollow group6 head_top       A       -862. -13.9 1761.
## 2     1            0 leadfollow group6 head_right     A         NA   NA     NA 
## 3     1            0 leadfollow group6 head_left      A         NA   NA     NA 
## 4     1            0 leadfollow group6 chest          A       -740. -78.3 1396.
## 5     1            0 leadfollow group6 back           A         NA   NA     NA 
## 6     1            0 leadfollow group6 shoulder_right A         NA   NA     NA 
## 
## Data frame at index: 16 
## # A tibble: 6 × 9
##   index elapsed_time condition  group  marker         subject     x      y     z
##   <dbl>        <dbl> <fct>      <fct>  <chr>          <chr>   <dbl>  <dbl> <dbl>
## 1     1            0 leadfollow group8 head_top       A       -481. -107.  1619.
## 2     1            0 leadfollow group8 head_right     A       -451.  -13.7 1553.
## 3     1            0 leadfollow group8 head_left      A       -549.  -30.4 1560.
## 4     1            0 leadfollow group8 chest          A         NA    NA     NA 
## 5     1            0 leadfollow group8 back           A       -584. -213.  1329.
## 6     1            0 leadfollow group8 shoulder_right A       -417. -220.  1379.
# Now 'list_of_dataframes' contains all the modified data frames
rm(df, i, marker_names)

Sorting by time

# Loop through each dataframe in the list and arrange by 'elapsed_time'
list_of_dataframes <- lapply(list_of_dataframes, function(df) {
  df %>% arrange(elapsed_time)
})

Saving CSVs

Now we have succesfully converted the files from Qualysis into dataframes that we can work on throughout the project, we will save each dataframe to a seperate folder. We also clean the environment of any values and dataframes that will not be useful going forward.

# Load necessary library
library(tidyverse)

# Check if the list is named; if not, you might need to assign names
if (is.null(names(list_of_dataframes))) {
  warning("The list_of_dataframes is not named. Data frames will be saved with index-based names.")
  names(list_of_dataframes) <- paste0("dataframe_", seq_along(list_of_dataframes))
}

# Create a sub-directory for the CSV files if it doesn't already exist
dir.create("data/raw_mocap_csv", showWarnings = FALSE, recursive = TRUE)

# Loop through each data frame in the list
for (df_name in names(list_of_dataframes)) {
  # Define the file path and name for each CSV within the data/csv_files directory
  file_path <- paste0("data/raw_mocap_csv/", df_name, ".csv")

  # Save the data frame to a CSV file
  write_csv(list_of_dataframes[[df_name]], file_path)
}

# Inform the user that the operation is complete
cat("All data frames have been saved in the 'data/csv_files' directory with their respective names.\n")
## All data frames have been saved in the 'data/csv_files' directory with their respective names.

A final clean before we work with our CSV files

#CLEANING YAY
rm(conditions, traj_data,traj_labels, i, marker_names, traj_files, data_dir, df_name, file_path, traj_files)

Pre-processing: Trimming to 30 seconds, NA’s, choosing markers.

We are only interested in a short frame of time within all the recorded data. However, since we will run into issues if we trim before gapfilling, we will save into two seperate dataframes: one that is trimmed into 30 seconds but not gapfilled, and one that is gapfilled before trimming to 30 seconds. We can then use the non-gapfilled version to check for NAs, only within the timeframe we are actually interested in.

list_of_dataframes_unfilled <- list_of_dataframes

Gap-filling

We now commence with gap filling one of the dataframes. Prior to doing this, we ran all the chunks in the NA-checking section to identify the most fitting markers; in order to save computational power, this section is now placed in the front to only visualise the relevant 30 seconds. For a detailed walkthrough of how we decided which markers to use, see following sections.

The reason why we gap-fill before trimming, is in order to account for all the NA’s that might exist in the intervals of trimming. This could result in sequences of NA’s towards the end and the beginning of the relevant sequence. When writing this document roughly 6000 NAs were eliminated when changing the order of gap-filling and trimming.

In order to later showcase the amount of NA’s in the other markers and to validify our choice of markers, the unfilled dataframe is also not filtered, and keeps data for every marker.

# get the markers of interest
markers_of_interest <- c(
  "hand_right",
  "head_left",
  "hand_left",
  "chest"
)

# Apply the filter to each dataframe in list_of_dataframes
list_of_dataframes <- lapply(list_of_dataframes, function(df) {
  df %>% dplyr::filter(marker %in% markers_of_interest)
})

One of the lists is now used to gap-fill while the other stays empty:

# Assuming markers_of_interest is defined and has at least 4 elements
# Assuming gap_fill_linear is a function that performs linear gap filling

for (i in seq_along(list_of_dataframes)) {
  # Access the dataframe
  df <- list_of_dataframes[[i]]

  # Extract a representative 'group' and 'condition' value (assuming they are consistent within each dataframe)
  group_value <- unique(df$group)[1]
  condition_value <- unique(df$condition)[1]

  # Iterate over selected markers
  for (sel_idx in 1:4) {
    # Plot before gap-filling
    plot_before <- df %>% 
      dplyr::filter(marker %in% markers_of_interest, marker == markers_of_interest[sel_idx]) %>%
      ggplot(aes(x = elapsed_time, y = x, color = subject)) +
      geom_line() +
      facet_wrap(~condition) +
      scale_color_manual(values= aesthetic_highlight_difference_palette)+
      labs(
        x = "Elapsed time",
        y = "Marker X position",
        title = paste("Marker", markers_of_interest[sel_idx], "X position before gap filling"),
        subtitle = paste("Group:", group_value, " | ", "Condition:", condition_value)
      )
    print(plot_before)

    # Apply the linear gap fill function to each column, by condition
    df <- df %>% 
      dplyr::group_by(condition, subject, marker) %>%
      dplyr::mutate(across(c(x, y, z), ~ gap_fill_linear(.)))

    # Update the dataframe in the list
    list_of_dataframes[[i]] <- df

    # Plot after gap-filling
    plot_after <- df %>% 
      dplyr::filter(marker %in% markers_of_interest, marker == markers_of_interest[sel_idx]) %>%
      ggplot(aes(x = elapsed_time, y = x, color = subject)) +
      geom_line() +
      facet_wrap(~condition) +
      scale_color_manual(values= aesthetic_highlight_difference_palette)+
      labs(
        x = "Elapsed time",
        y = "Marker X position",
        title = paste("Marker", markers_of_interest[sel_idx], "X position after gap filling"),
        subtitle = paste("Group:", group_value, " | ", "Condition:", condition_value)
      )
    print(plot_after)
    # Save the plot if needed
    # ggsave(...) - add the appropriate ggsave call here if needed
  }
}

# At this point, list_of_dataframes is updated with the gap-filled data

# Combine all dataframes into a single dataframe
combined_df <- dplyr::bind_rows(list_of_dataframes, .id = "group")

rm(plot_after, plot_before, df, sel_idx, i, condition_value, group_value)

Trimming

Before proceeding we must trim the trajectories to accurately match the window of data collection - meaning, we must identify the 30 second window from which the t-pose starts.

Our original dataframes have a frame every 0,003rd second, so in order to save computational power, we simplify the dataframe for visualisation. In this case we will have points every half second. In doing so we can visually identify the point of t-pose start and end.

When we trim both frames, we effectively create a prior and posterior version of our data, which will later be used for NA-checking, euclidian distance, etc.

# Create a new list with processed dataframes without overwriting the original list
every_half_second_df_list <- lapply(list_of_dataframes, function(df) {
  df %>%
    group_by(subject, marker) %>%
    slice(seq(1, n(), by = 151)) %>%
    ungroup()
})

Now, every_half_second_df_list contains the simplified versions of the dataframes: the version with data every half second. Using this list, we can now make 3d plots with an included timeframe and identify the t-poses and effectively our 30 second window.

# Create an empty list to store the plots
plot_list <- list()

# Iterate through the simplified dataframes
for (i in seq_along(every_half_second_df_list)) {
  df_name <- names(every_half_second_df_list)[i]
  df <- every_half_second_df_list[[i]]
  
  # Create the base plot
  fig <- plotly::plot_ly(df, 
                 x = ~x, 
                 y = ~y, 
                 z = ~z, 
                 type = "scatter3d", 
                 mode = "markers", 
                 size = 2,
                 frame = ~elapsed_time,
                 marker = list(size = 4), ## Adjusting the marker size
                 color = ~subject)
  
  fig <- fig %>% plotly::layout(
    scene = list(
      xaxis = list(title = "X-axis", range = c(-800, 800)), 
      yaxis = list(title = "Y-axis", range = c(-800, 800)),
      zaxis = list(title = "Z-axis", range = c(0, 1800)),
      aspectmode = "manual",
      aspectratio = list(x = 1, y = 1, z = 1)
    ),
    xaxis = list(title = "X-axis"), 
    yaxis = list(title = "Y-axis"),
    zaxis = list(title = "Z-axis"))
  
  fig <- fig %>% ## adding and changing text
    plotly::layout(title = list(text = df_name, y = 0.9), 
                   font=list(size=15, family = "Times new roman"),
                   legend = list(title = list(text = "markers")))
  
  plot_list[[i]] <- fig
}

# Now plot_list contains all the plots for each simplified dataframe

# Create the 'results' directory if it doesn't exist
if (!dir.exists("results")) {
  dir.create("results")
}

# Create the '3D plots' subdirectory inside 'results' if it doesn't exist
subdir <- "results/3D plots"
if (!dir.exists(subdir)) {
  dir.create(subdir)
}

# Loop through the list of plots and save each one
for (i in seq_along(plot_list)) {
  plot <- plot_list[[i]]
  filename <- paste0(subdir, "/plot_", i, ".html") # Save files in the subdir
  saveWidget(plot, file = filename)
}
plot_list
## [[1]]
## 
## [[2]]
## 
## [[3]]
## 
## [[4]]
## 
## [[5]]
## 
## [[6]]
## 
## [[7]]
## 
## [[8]]
## 
## [[9]]
## 
## [[10]]
## 
## [[11]]
## 
## [[12]]
## 
## [[13]]
## 
## [[14]]
## 
## [[15]]
## 
## [[16]]
rm(df, fig, plot, i, df_name, filename, current_df)

Having manually identified the relevant time-frames, we create a new dataframe and input the relevant values of elapsed time.

start_time_df <- data.frame(
  group_and_condition = c("group0_jointlead",
                          "group0_leadfollow",
                          "group1_jointlead",
                          "group1_leadfollow",
                          "group12_jointlead",
                          "group12_leadfollow",
                          "group13_jointlead",
                          "group13_leadfollow",
                          "group2_jointlead",
                          "group2_leadfollow",
                          "group3_jointlead",
                          "group3_leadfollow",
                          "group4_leadfollow",
                          "group6_jointlead",
                          "group6_leadfollow",
                          "group8_leadfollow"),
  elapsed_time_at_start = as.numeric(c("17",
                            "35",
                            "12",
                            "50",
                            "33",
                            "28",
                            "37",
                            "94",
                            "18",
                            "31",
                            "12",
                            "42",
                            "37",
                            "14",
                            "29",
                            "52")
                            )
)
#We no longer need the simplified dataframe :)
rm(every_half_second_df_list)

Now we will iterate through all the group dataframes and pick the elapsed_time value associated. First we do this for the gap-filled list:

# Iterate through rows of start_time_df and extract subsets
for (i in 1:nrow(start_time_df)) {
  df_name <- start_time_df$group_and_condition[i]
  start_time <- start_time_df$elapsed_time_at_start[i]
  
  # Find the corresponding dataframe in list_of_dataframes, if it exists
  if (df_name %in% names(list_of_dataframes)) {
    # Access the dataframe directly from the list
    df <- list_of_dataframes[[df_name]]
    
    # Arrange the dataframe by elapsed_time
    df <- df %>% dplyr::arrange(elapsed_time)
    
    # Extract the subset based on the starting value
    starting_index <- which(df$elapsed_time == start_time)[1]
    ending_index <- which(df$elapsed_time == start_time + 30)[1]
    
    if (!is.na(starting_index) && !is.na(ending_index)) {
      # Update the original dataframe in the list with the subset
      list_of_dataframes[[df_name]] <- df[starting_index:ending_index, ]
    }
    # No else block needed, if indices are not found, the original dataframe remains unchanged
  }
  # No else block needed, if the dataframe does not exist in the list, nothing happens
}

# Now, list_of_dataframes contains the updated dataframes with the subsets

Next we will trim the unfilled list:

# Iterate through rows of start_time_df and extract subsets
for (i in 1:nrow(start_time_df)) {
  df_name <- start_time_df$group_and_condition[i]
  start_time <- start_time_df$elapsed_time_at_start[i]
  
  # Find the corresponding dataframe in list_of_dataframes, if it exists
  if (df_name %in% names(list_of_dataframes_unfilled)) {
    # Access the dataframe directly from the list
    df <- list_of_dataframes_unfilled[[df_name]]
    
    # Arrange the dataframe by elapsed_time
    df <- df %>% dplyr::arrange(elapsed_time)
    
    # Extract the subset based on the starting value
    starting_index <- which(df$elapsed_time == start_time)[1]
    ending_index <- which(df$elapsed_time == start_time + 30)[1]
    
    if (!is.na(starting_index) && !is.na(ending_index)) {
      # Update the original dataframe in the list with the subset
      list_of_dataframes_unfilled[[df_name]] <- df[starting_index:ending_index, ]
    }
    # No else block needed, if indices are not found, the original dataframe remains unchanged
  }
  # No else block needed, if the dataframe does not exist in the list, nothing happens
}

# Now, list_of_dataframes contains the updated dataframes with the subsets

#cleaning up again! 
rm(df, df_name, ending_index, first_index, i, start_time, starting_index, start_time_df)

Visualising gap-filling on trimmed data

First we visualise the untrimmed dataset:

# Assuming markers_of_interest is defined and has at least 4 elements
# Assuming gap_fill_linear is a function that performs linear gap filling

for (i in seq_along(list_of_dataframes_unfilled)) {
  # Access the dataframe
  df <- list_of_dataframes_unfilled[[i]]

  # Create a combined plot for each group and condition, faceted by marker
  unique_groups <- unique(df$group)
  unique_conditions <- unique(df$condition)

  for (group in unique_groups) {
    for (condition in unique_conditions) {
      plot_combined <- df %>% 
        dplyr::filter(group == group, condition == condition, marker %in% markers_of_interest) %>%
        ggplot(aes(x = elapsed_time, y = x, color = subject)) +
        geom_line() +
        scale_color_manual(values= aesthetic_highlight_difference_palette)+
        facet_wrap(~marker, nrow = 2, ncol = 2) +  # Arrange facets in a 2x2 grid by marker
        labs(
          x = "Elapsed time",
          y = "Marker X position",
          title = paste("Before Gap-filling"),
          subtitle = paste("Group:", group, "| Condition:", condition, "- Marker X Positions")
        )
      print(plot_combined)
    }
  }
}

There are a couple of errors clearly visible in the plots, that will have an impact on our analysis. For instance, group 8 gets cut off too early. Meaning the recording was either stopped before it should have been, or has been cut short during the session afterwards.

Then we visualise the data after gap-filling:

# Assuming markers_of_interest is defined and has at least 4 elements
# Assuming gap_fill_linear is a function that performs linear gap filling

for (i in seq_along(list_of_dataframes)) {
  # Access the dataframe
  df <- list_of_dataframes[[i]]

  # Create a combined plot for each group and condition, faceted by marker
  unique_groups <- unique(df$group)
  unique_conditions <- unique(df$condition)

  for (group in unique_groups) {
    for (condition in unique_conditions) {
      plot_combined <- df %>% 
        dplyr::filter(group == group, condition == condition, marker %in% markers_of_interest) %>%
        ggplot(aes(x = elapsed_time, y = x, color = subject)) +
        geom_line() +
        scale_color_manual(values= aesthetic_highlight_difference_palette)+
        facet_wrap(~marker, nrow = 2, ncol = 2) +  # Arrange facets in a 2x2 grid by marker
        labs(
          x = "Elapsed time",
          y = "Marker X position",
          title = paste("After Gap-filling"),
          subtitle = paste("Group:", group, "| Condition:", condition, "- Marker X Positions")
        )
      print(plot_combined)
    }
  }
}

NA checking pre-gapfill

As clarified prior to gap filling, the following sections were used to choose which markers to include in our analysis. In the following we will use the list of dataframes that have been cut to the relevant 30 seconds yet not gap-filled to give a thorough overview of the actually relevant amount of NAs.

For the most accurate analysis we wanted to include the following markers: Head, both hands and elbows, and chest. We checked for NA’s to verify none of these markers have too many NAs.

combined_df <- dplyr::bind_rows(list_of_dataframes_unfilled) %>% 
  dplyr::ungroup()  

combined_df %>% 
  dplyr::summarise(Total_NAs = sum(is.na(x)))
## # A tibble: 1 × 1
##   Total_NAs
##       <int>
## 1    505680
combined_df %>% 
  dplyr::summarise(Total_NON_NAs = sum(!is.na(x)))
## # A tibble: 1 × 1
##   Total_NON_NAs
##           <int>
## 1       2518336
#NA
na_pre_gap <- combined_df %>%
  group_by(marker) %>%
  summarise(na_count = sum(is.na(x))) %>% 
  arrange(na_count)

#NON NA
non_na_pre_gap <-combined_df %>%
  group_by(marker) %>%
  summarise(non_na_count = sum(!is.na(x))) %>% 
  arrange(non_na_count)

print(na_pre_gap)
## # A tibble: 11 × 2
##    marker         na_count
##    <chr>             <int>
##  1 head_top           1994
##  2 head_left         16513
##  3 shoulder_right    17522
##  4 shoulder_left     26239
##  5 head_right        27069
##  6 chest             42472
##  7 hand_right        47092
##  8 hand_left         53092
##  9 elbow_right       76457
## 10 elbow_left        87607
## 11 back             109623
print(non_na_pre_gap)
## # A tibble: 11 × 2
##    marker         non_na_count
##    <chr>                 <int>
##  1 head_top             142022
##  2 back                 178377
##  3 elbow_left           200393
##  4 elbow_right          211543
##  5 hand_left            234908
##  6 hand_right           240908
##  7 chest                245528
##  8 head_right           260931
##  9 shoulder_left        261761
## 10 shoulder_right       270478
## 11 head_left            271487
rm(non_na_pre_gap, na_pre_gap)

Identifying who has an extra headmarker

We noticed that head_top didn’t have a lot of NA’s, so we checked whether or not it was the extra marker for A

  list_of_dataframes_unfilled$group0_jointlead %>%
  dplyr::filter(subject == "B") %>%
  distinct(marker)
## # A tibble: 10 × 1
##    marker        
##    <chr>         
##  1 head_right    
##  2 head_left     
##  3 chest         
##  4 back          
##  5 shoulder_right
##  6 shoulder_left 
##  7 elbow_right   
##  8 elbow_left    
##  9 hand_right    
## 10 hand_left
## Subject B has no head_top heheheheheee

head_top does not appear as a marker in the list, given subject B was only given 2 dots on their head and A was given two; therefore, head_top is the “missing” marker and we therefore cannot use it to compare subjects.

Visualising the NAs

# Combine all dataframes in list_of_dataframes into one dataframe
combined_df <- dplyr::bind_rows(list_of_dataframes_unfilled, .id = "group")

# Identifying columns with NAs and processing
na_columns <- colnames(combined_df)[colSums(is.na(combined_df)) > 0]
columns_to_summarize <- c("marker", na_columns, "group", "condition")

# Summarize the number of NAs for each marker
result <- combined_df %>%
  dplyr::select(dplyr::all_of(columns_to_summarize)) %>%
  dplyr::group_by(group, marker, condition) %>%
  dplyr::summarise(across(everything(), ~ sum(is.na(.))), .groups = "drop") %>%
  dplyr::arrange(marker, condition)

# Plotting with facet wrap
p <- ggplot(result, aes(x = marker, y = x, fill = marker)) +
  geom_bar(stat = "identity", position = "dodge", show.legend = FALSE) +
  labs(title = "NA's in Combined Dataframes", x = "Marker", y = "Number of NA") +
  facet_wrap(~ condition) +
  coord_flip()+
  scale_fill_manual(values = aesthetic_palette)

# Print the combined plot
print(p)

rm(p, result, columns_to_summarize, na_columns)
# Now we can get the longest sequence of NAs for each marker
# Function to calculate the longest NA run length
max_na_run_length <- function(vec) {
  rle_na <- rle(is.na(vec))
  max(rle_na$lengths[rle_na$values], na.rm = TRUE, default = 0)
}

# Now we can get the longest sequence of NAs for each marker
longest_na_seq <- combined_df %>% 
  group_by(condition, marker) %>%
  summarise(
    x_length = max_na_run_length(x),
    .groups = "drop"
  )

# plot to check if it is acceptable
longest_na_seq %>% 
  ggplot(aes(x = marker, y = x_length, fill=marker)) +
  geom_col(
    show.legend = FALSE
  ) +
  coord_flip() +
  facet_wrap(c(~condition)) +
  labs(
    x = "Marker",
    y = "Longest NA sequence",
    title = "Longest NA sequence by condition"
  )+
  scale_fill_manual(values = aesthetic_palette)+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Unfortunately, the elbow markers have a lot of NA’s. According to our plots, the head markers are consistently low in NA’s, while the distribution of NA’s for hands and chest is not consistent throughout the groups, we continue with checking the euclydian distance to further verify the validity of choosing said markers. However, the head_top is only present for participant A, and therefore we must choose either head_left or head_right. Since head_right has fewer NA’s and the longest sequence is also smaller, we proceed with head_right

Euclidean distance

# Iterate directly over the dataframes in list_of_dataframes
for (df in list_of_dataframes_unfilled) {
  # Calculate the euclidean distance between each marker (using x, y, z)
  # We will do this by subject, marker, and axis
  marker_distances <- df %>% 
    dplyr::group_by(subject, marker) %>%
    dplyr::arrange(index) %>%
    dplyr::mutate(
      diff_x = x - dplyr::lag(x, 1),
      diff_y = y - dplyr::lag(y, 1),
      diff_z = z - dplyr::lag(z, 1)
    ) %>%
    dplyr::mutate(
      euclidean_distance = sqrt(diff_x^2 + diff_y^2 + diff_z^2)
    )

  # Plot the series for each marker, and see if anything stands out
  plot <- ggplot(marker_distances, aes(x = index, y = euclidean_distance)) +
    geom_line(aes(color = factor(marker)), linewidth = 1.25) +
    theme_minimal() +
    facet_wrap(~condition + subject) +
    labs(
      x = "Index",
      y = "Euclidean distance",
      title = "Euclidean distance from the previous frame by marker",
      subtitle = paste( df$group[1], "Condition:", df$condition[1])
    )+
  scale_color_manual(values = aesthetic_palette)
  
  # Print the plot
  print(plot)
}

rm(df, df_name, marker_distances, plot)
# Assuming list_of_dataframes is a list of dataframes

for (df in list_of_dataframes_unfilled) {
  # Calculate the euclidean distance between each marker (using x, y, z)
  # We will do this by subject, marker, and axis
  marker_distances <- df %>% 
    dplyr::group_by(subject, marker) %>%
    dplyr::arrange(index) %>%
    dplyr::mutate(
      diff_x = x - dplyr::lag(x, 1),
      diff_y = y - dplyr::lag(y, 1),
      diff_z = z - dplyr::lag(z, 1)
    ) %>%
    dplyr::mutate(
      euclidean_distance = sqrt(diff_x^2 + diff_y^2 + diff_z^2)
    ) %>%
    # Filter only the specified markers
    dplyr::filter(marker %in% c("head_right", "hand_right", "hand_left", "chest"))

  # Plot the series for each marker, and see if anything stands out
  plot <- ggplot(marker_distances, aes(x = index, y = euclidean_distance)) +
    geom_line(aes(color = factor(marker)), linewidth = 1.25) +
    theme_minimal() +
    facet_wrap(~condition + subject) +
    labs(
      x = "Index",
      y = "Euclidean distance",
      title = "Euclidean distance from the previous frame by marker",
      subtitle = paste("Group:", df$group[1], "Condition:", df$condition[1])
    )+
  scale_color_manual(values = c("#d8aeb5","#a94f62","#8d5b5a","#2f1a1b"))
  
  # Print the plot
  print(plot)
}

rm(df, marker_distances, plot)

Inspecting the raw data by plotting the x-coordinates

(inspecting to further validate the selection of markers -> the markers chosen account for most of movement)

## Making a for loop that plots x-coordinates for all markers in all groups
for (df in list_of_dataframes_unfilled) {
 # Create ggplot
  plot <- ggplot(df, aes(x = x, y = marker, color = global_fill_colour)) +
    geom_point() +
    ggtitle(paste("Group:", df$group[1], "Condition:", df$condition[1]))+
    scale_color_manual(values = global_fill_colour)+
    theme(legend.position = "none") 
  
  # Print the plot
  print(plot)
}

rm(df, plot)

It is clear the quality of the dataset differs immensely, and some groups have significantly more NA’s than others.

NA-check only relevant markers pre gap-fill

# Apply the filter to each dataframe in list_of_dataframes
list_of_dataframes_unfilled <- lapply(list_of_dataframes_unfilled, function(df) {
  df %>% dplyr::filter(marker %in% markers_of_interest)
})
#overwriting the combined
combined_df <- dplyr::bind_rows(list_of_dataframes_unfilled) %>% 
  dplyr::ungroup()  

#Totals
combined_df %>% 
  dplyr::summarise(Total_NAs = sum(is.na(x)))
## # A tibble: 1 × 1
##   Total_NAs
##       <int>
## 1    159169
combined_df %>% 
  dplyr::summarise(Total_NON_NAs = sum(!is.na(x)))
## # A tibble: 1 × 1
##   Total_NON_NAs
##           <int>
## 1        992831
#NA

na_post_gap <- combined_df %>%
  group_by(marker) %>%
  summarise(na_count = sum(is.na(x)))

#NON NA
non_na_post_gap <- combined_df %>%
  group_by(marker) %>%
  summarise(non_na_count = sum(!is.na(x)))

print(non_na_post_gap)
## # A tibble: 4 × 2
##   marker     non_na_count
##   <chr>             <int>
## 1 chest            245528
## 2 hand_left        234908
## 3 hand_right       240908
## 4 head_left        271487
print(na_post_gap)
## # A tibble: 4 × 2
##   marker     na_count
##   <chr>         <int>
## 1 chest         42472
## 2 hand_left     53092
## 3 hand_right    47092
## 4 head_left     16513
rm(non_na_post_gap, na_post_gap)

NA check post gap-fill

#overwriting the combined with the gap filled from the list
combined_df <- dplyr::bind_rows(list_of_dataframes) %>% 
  dplyr::ungroup()  

#Totals
combined_df %>% 
  dplyr::summarise(Total_NAs = sum(is.na(x)))
## # A tibble: 1 × 1
##   Total_NAs
##       <int>
## 1     77788
combined_df %>% 
  dplyr::summarise(Total_NON_NAs = sum(!is.na(x)))
## # A tibble: 1 × 1
##   Total_NON_NAs
##           <int>
## 1       1074228
#NA

na_post_gap <- combined_df %>%
  group_by(marker) %>%
  summarise(na_count = sum(is.na(x)))

#NON NA
non_na_post_gap <- combined_df %>%
  group_by(marker) %>%
  summarise(non_na_count = sum(!is.na(x)))

print(non_na_post_gap)
## # A tibble: 4 × 2
##   marker     non_na_count
##   <chr>             <int>
## 1 chest            264836
## 2 hand_left        260647
## 3 hand_right       267707
## 4 head_left        281038
print(na_post_gap)
## # A tibble: 4 × 2
##   marker     na_count
##   <chr>         <int>
## 1 chest         23164
## 2 hand_left     27353
## 3 hand_right    20293
## 4 head_left      6978
rm(non_na_post_gap, na_post_gap)

We still have a lot of NA’s caused by inadequate registering of markers during experiment. After gap-filling the total NAs have gone from 169725 to 93280.

Saving pre-processed files:

We will continue analysis in a separate file

# Create a sub-directory for the CSV files if it doesn't already exist
dir.create("data/mocap_data_prepped", showWarnings = FALSE, recursive = TRUE)

# Loop through each data frame in the list
for (df_name in names(list_of_dataframes)) {
  # Define the file path and name for each CSV within the data/csv_files directory
  file_path <- paste0("data/mocap_data_prepped/", df_name, ".csv")

  # Save the data frame to a CSV file
  write_csv(list_of_dataframes[[df_name]], file_path)
}

# Inform the user that the operation is complete
cat("All data frames have been saved in the 'mocap_data_prepped' directory with their respective names.\n")
## All data frames have been saved in the 'mocap_data_prepped' directory with their respective names.

-------